
%% Created for Tom Alterman at 2009-05-24 22:22:31 +0100 


%% Saved with string encoding Unicode (UTF-8) 



@article{yangknn,
	Author = {Yang, Y.},
	Journal = {Information retrieval},
	Number = {1},
	Pages = {69--90},
	Publisher = {Springer},
	Title = {{An evaluation of statistical approaches to text categorization}},
	Volume = {1},
	Year = {1999}}

@article{michie,
	Author = {Michie, D. and Spiegelhalter, D.J. and Taylor, C.C.},
	Journal = {Neural and Statistical classification},
	Title = {{Machine Learning}},
	Year = {1994}}

@article{lewis,
	Author = {Lewis, D.D.},
	Journal = {Lecture Notes in Computer Science},
	Pages = {4--18},
	Publisher = {Springer},
	Title = {{Naive (Bayes) at forty: The independence assumption in information retrieval}},
	Volume = {1398},
	Year = {1998}}

@article{knerr,
	Author = {Knerr, S. and Personnaz, L. and Dreyfus, G. and Fogelman, J. and Agresti, A. and Ajiz, MA and Jennings, A. and Alizadeh, F. and Alizadeh, F. and Haeberly, J.P.A. and others},
	Journal = {Optimization Methods and Software},
	Pages = {23--34},
	Publisher = {Rensselaer Polytechnic Institute},
	Title = {{Single-layer learning revisited: A stepwise procedure for building and training a neural network}},
	Volume = {1}}

@article{hsu,
	Author = {Hsu, C.W. and Lin, C.J.},
	Journal = {IEEE Transactions on Neural Networks},
	Number = {2},
	Pages = {415--425},
	Title = {{A comparison of methods for multiclass support vector machines}},
	Volume = {13},
	Year = {2002}}

@article{platt,
	Author = {Platt, J.C. and Cristianini, N. and Shawe-Taylor, J.},
	Journal = {Advances in neural information processing systems},
	Number = {3},
	Pages = {547--553},
	Title = {{Large margin DAGs for multiclass classification}},
	Volume = {12},
	Year = {2000}}

@misc{prac,
	Author = {Hsu, C.W. and Chang, C.C. and Lin, C.J. and others},
	Title = {{A practical guide to support vector classification}},
	Year = {2003}}

@book{intro,
	Author = {Cristianini, N. and Shawe-Taylor, J.},
	Publisher = {Cambridge university press},
	Title = {{An introduction to support vector machines}},
	Year = {2000}}

@conference{boser,
	Author = {Boser, B.E. and Guyon, I.M. and Vapnik, V.N.},
	Booktitle = {Proceedings of the fifth annual workshop on Computational learning theory},
	Organization = {ACM New York, NY, USA},
	Pages = {144--152},
	Title = {{A training algorithm for optimal margin classifiers}},
	Year = {1992}}

@article{vapnik,
	Author = {Cortes, C. and Vapnik, V.},
	Journal = {Machine learning},
	Number = {3},
	Pages = {273--297},
	Publisher = {Springer},
	Title = {{Support-vector networks}},
	Volume = {20},
	Year = {1995}}

@article{tanaka,
	Author = {Tanaka, E. and Tanaka, K.},
	Journal = {INT. J. PATTERN RECOG. ARTIF. INTELL.},
	Number = {2},
	Pages = {221--240},
	Title = {{The tree-to-tree editing problem.}},
	Volume = {2},
	Year = {1988}}

@article{tai,
	Author = {Tai, K.C.},
	Journal = {Journal of the Association for Computing Machm{\copyright} ry},
	Number = {3},
	Pages = {422--433},
	Title = {{The tree-to-tree correction problem}},
	Volume = {26},
	Year = {1979}}

@misc{fompatent,
	Author = {Chen, J.L. and Yang, Y. and Zhang, H.J.},
	Month = nov # {~25},
	Note = {US Patent 7,458,017},
	Publisher = {Google Patents},
	Title = {{Function-based object model for use in website adaptation}},
	Year = {2008}}

@inproceedings{fom,
	Address = {New York, NY, USA},
	Author = {Chen,, Jinlin and Zhou,, Baoyao and Shi,, Jin and Zhang,, Hongjiang and Fengwu,, Qiu},
	Booktitle = {WWW '01: Proceedings of the 10th international conference on World Wide Web},
	Isbn = {1-58113-348-0},
	Location = {Hong Kong, Hong Kong},
	Pages = {587--596},
	Publisher = {ACM},
	Title = {Function-based object model towards website adaptation},
	Year = {2001}}

@conference{percep,
	Author = {Chen, J. and Xiao, K.},
	Booktitle = {Proceedings of the 8th ACM/IEEE-CS joint conference on Digital libraries},
	Organization = {ACM New York, NY, USA},
	Pages = {363--366},
	Title = {{Perception-oriented online news extraction}},
	Year = {2008}}

@mastersthesis{datamaps,
	Author = {Edward Read},
	Date-Added = {2009-05-20 02:59:20 +0100},
	Date-Modified = {2009-05-20 03:00:24 +0100},
	School = {Bristol University},
	Title = {Visualising Spatial Information Using Data Maps},
	Year = {2007}}

@article{entityweb,
	Author = {Etzioni, O. and Cafarella, M. and Downey, D. and Popescu, A.M. and Shaked, T. and Soderland, S. and Weld, D.S. and Yates, A.},
	Journal = {Artificial Intelligence},
	Number = {1},
	Pages = {91--134},
	Publisher = {Elsevier Science Publishing Co, Inc, P. O. Box 882, Madison Square Station, New York, NY, 10159-0882, USA,},
	Title = {{Unsupervised named-entity extraction from the web: An experimental study}},
	Volume = {165},
	Year = {2005}}

@conference{entitynews,
	Author = {Miller, D. and Schwartz, R. and Weischedel, R. and Stone, R.},
	Booktitle = {Broadcast News Workshop'99 Proceedings},
	Organization = {Morgan Kaufmann},
	Pages = {37},
	Title = {{Named entity extraction from broadcast news}},
	Year = {1999}}

@article{entitytext,
	Author = {McCallum, A.},
	Publisher = {ACM New York, NY, USA},
	Title = {{Information extraction: distilling structured data from unstructured text}},
	Year = {2005}}

	@conference{fomconv,
	  title={{Function-based object model towards website adaptation}},
	  author={Chen, J. and Zhou, B. and Shi, J. and Zhang, H. and Fengwu, Q.},
	  booktitle={Proceedings of the 10th international conference on World Wide Web},
	  pages={587--596},
	  year={2001},
	  organization={ACM New York, NY, USA}
	}


@conference{decision,
	Author = {Apte, C. and Damerau, F. and Weiss, SM and Apte, C. and Damerau, F. and Weiss, S.},
	Booktitle = {In Workshop on Learning from text and the Web, Conference on Automated Learning and Discovery},
	Title = {{Text mining with decision rules and decision trees}},
	Year = {1998}}

@conference{compstudy,
	Author = {Yang, Y. and Pedersen, J.O.},
	Booktitle = {Machine Learning-International Workshop then Conference},
	Organization = {Morgan Kaufmann Publishers, Inc.},
	Pages = {412--420},
	Title = {{A comparative study on feature selection in text categorization}},
	Year = {1997}}

@inproceedings{highperform,
	Address = {New York, NY, USA},
	Author = {Rogati,, Monica and Yang,, Yiming},
	Booktitle = {CIKM '02: Proceedings of the eleventh international conference on Information and knowledge management},
	Isbn = {1-58113-492-4},
	Location = {McLean, Virginia, USA},
	Pages = {659--661},
	Publisher = {ACM},
	Title = {High-performing feature selection for text classification},
	Year = {2002},
	Bdsk-Url-1 = {http://doi.acm.org/10.1145/584792.584911}}

@article{porter,
	Author = {Porter, M.F.},
	Journal = {Program: electronic library and information systems},
	Number = {3},
	Pages = {211--218},
	Publisher = {Emerald, 60/62 Toller Lane, Bradford, West Yorkshire, BD 8 9 BY, UK,},
	Title = {{An algorithm for suffix stripping}},
	Volume = {40},
	Year = {2006}}

@conference{portpop,
	Author = {Hsu, W.L. and Lang, S.D.},
	Booktitle = {Proceedings of the eighth international conference on Information and knowledge management},
	Organization = {ACM New York, NY, USA},
	Pages = {114--121},
	Title = {{Classification algorithms for NETNEWS articles}},
	Year = {1999}}

@conference{sw,
	Author = {Silva, C. and Ribeiro, B.},
	Booktitle = {Neural Networks, 2003. Proceedings of the International Joint Conference on},
	Title = {{The importance of stop word removal on recall values in text categorization}},
	Volume = {3},
	Year = {2003}}

@conference{synonyms,
	Author = {Baker, L.D. and McCallum, A.K.},
	Booktitle = {Proceedings of the 21st annual international ACM SIGIR conference on Research and development in information retrieval},
	Organization = {ACM New York, NY, USA},
	Pages = {96--103},
	Title = {{Distributional clustering of words for text classification}},
	Year = {1998}}

@conference{dumais,
	Author = {Dumais, S. and Platt, J. and Heckerman, D. and Sahami, M.},
	Booktitle = {Proceedings of the seventh international conference on Information and knowledge management},
	Pages = {148--155},
	Title = {{Inductive learning algorithms and representations for text categorization}},
	Year = {1998}}
	

	@article{termimp,
	  title={{A theory of term importance in automatic text analysis}},
	  author={Salton, G. and Yang, CS and Yu, CT},
	  year={1974},
	  publisher={Cornell University}
	}

@article{salton,
	Author = {Salton, G. and Buckley, C.},
	Publisher = {Cornell University},
	Title = {{Term weighting approaches in automatic text retrieval}},
	Year = {1987}}

@article{mlintextcat,
	Author = {Sebastiani, F.},
	Journal = {ACM computing surveys (CSUR)},
	Number = {1},
	Pages = {1--47},
	Publisher = {ACM New York, NY, USA},
	Title = {{Machine learning in automated text categorization}},
	Volume = {34},
	Year = {2002}}

@inproceedings{concepts,
	Address = {Morristown, NJ, USA},
	Author = {Sahlgren,, Magnus and C\"{o}ster,, Rickard},
	Booktitle = {COLING '04: Proceedings of the 20th international conference on Computational Linguistics},
	Location = {Geneva, Switzerland},
	Pages = {487},
	Publisher = {Association for Computational Linguistics},
	Title = {Using bag-of-concepts to improve the performance of support vector machines in text categorization},
	Year = {2004},
	Bdsk-Url-1 = {http://dx.doi.org/10.3115/1220355.1220425}}

	@article{bns,
	  title={{BNS feature scaling: an improved representation over tf-idf for svm text classification}},
	  author={Forman, G.},
	  year={2008},
	  publisher={ACM New York, NY, USA}
	}

@conference{vwrap,
	Author = {Meng, X. and Wang, H. and Hu, D. and Li, C.},
	Booktitle = {Computer Software and Applications Conference, 2003. COMPSAC 2003. Proceedings. 27th Annual International},
	Pages = {657--662},
	Title = {{A supervised visual wrapper generator for web-data extraction}},
	Year = {2003}}

@conference{vs,
	Author = {Zheng, S. and Song, R. and Wen, J.},
	Booktitle = {Proceedings of the National Conference on Artificial Intelligence},
	Number = {2},
	Organization = {Menlo Park, CA; Cambridge, MA; London; AAAI Press; MIT Press; 1999},
	Pages = {1507},
	Title = {{Template-independent news extraction based on visual consistency}},
	Volume = {22},
	Year = {2007}}

@conference{treeedit,
	Author = {Reis, DC and Golgher, PB and Silva, AS and Laender, AF},
	Booktitle = {Proceedings of the 13th international conference on World Wide Web},
	Date-Added = {2009-05-16 23:13:24 +0100},
	Date-Modified = {2009-05-16 23:13:45 +0100},
	Organization = {ACM New York, NY, USA},
	Pages = {502--511},
	Title = {Automatic web news extraction using tree edit distance},
	Year = {2004}}

@conference{domtree,
	Author = {Gupta, S. and Kaiser, G. and Neistadt, D. and Grimm, P.},
	Booktitle = {Proceedings of the 12th international conference on World Wide Web},
	Date-Added = {2009-05-16 20:34:37 +0100},
	Date-Modified = {2009-05-16 20:34:49 +0100},
	Organization = {ACM New York, NY, USA},
	Pages = {207--214},
	Title = {DOM-based content extraction of HTML documents},
	Year = {2003}}

@article{pagerank,
	Author = {Brin, S. and Page, L.},
	Journal = {Computer networks and ISDN systems},
	Number = {1-7},
	Pages = {107--117},
	Publisher = {Elsevier},
	Title = {{The anatomy of a large-scale hypertextual Web search engine}},
	Volume = {30},
	Year = {1998}}

@misc{googlenews,
	Author = {Google},
	Date-Added = {2009-05-16 02:43:33 +0100},
	Date-Modified = {2009-05-16 15:38:26 +0100},
	Lastchecked = {14/05/09},
	Note = {Last Accessed 13/05/09},
	Title = {About Google News},
	Url = {http://news.google.com/intl/en_us/about_google_news.html},
	Urldate = {14/05/09},
	Year = {2009},
	Bdsk-Url-1 = {http://news.google.com/intl/en_us/about_google_news.html}}

@misc{declinelocal,
	Author = {Brad Stone},
	Date-Added = {2009-05-13 00:55:25 +0100},
	Date-Modified = {2009-05-16 15:37:02 +0100},
	Lastchecked = {13/05/09},
	Note = {Last Accessed 13/05/09},
	Title = {The Decline of Local News on the Net?},
	Url = {http://bits.blogs.nytimes.com/2007/08/16/the-decline-of-local-news-on-the-net/},
	Year = {2007},
	Bdsk-Url-1 = {http://bits.blogs.nytimes.com/2007/08/16/the-decline-of-local-news-on-the-net/}}

@conference{joa,
	Author = {Joachims, T. and Nedellec, C. and Rouveirol, C.},
	Booktitle = {Machine Learning: ECML-98 10th European Conference on Machine Learning, Chemnitz, Germany},
	Date-Added = {2009-04-24 11:06:29 +0100},
	Date-Modified = {2009-04-24 11:06:29 +0100},
	Organization = {Springer},
	Title = {Text categorization with support vector machines: learning with many relevant},
	Year = {1998}}
	
	@Manual{libsvm,
	  author =	 {Chih-Chung Chang and Chih-Jen Lin},
	  title =	 {{LIBSVM}: a library for support vector machines},
	  year =	 {2001},
	  note =	 {Software available at \url{http://www.csie.ntu.edu.tw/~cjlin/libsvm}}
	}
	
	@article{silva2006ags,
	  title={{Adding geographic scopes to web resources}},
	  author={Silva, M.J. and Martins, B. and Chaves, M. and Afonso, A.P. and Cardoso, N.},
	  journal={Computers, Environment and Urban Systems},
	  volume={30},
	  number={4},
	  pages={378--399},
	  year={2006},
	  publisher={Elsevier}
	}
	@article{lewis2004rnb,
	  title={{Rcv1: A new benchmark collection for text categorization research}},
	  author={Lewis, D.D. and Yang, Y. and Rose, T.G. and Li, F.},
	  journal={The Journal of Machine Learning Research},
	  volume={5},
	  pages={361--397},
	  year={2004},
	  publisher={MIT Press Cambridge, MA, USA}
	}

	@conference{world,
	  title={{Feature generation for text categorization using world knowledge}},
	  author={Gabrilovich, E. and Markovitch, S.},
	  booktitle={International Joint Conference on Artificial Intelligence},
	  volume={19},
	  pages={1048},
	  year={2005},
	  organization={Lawrence Erlbaum Associates Ltd}
	}
	
	@article{wiki,
	  title={{Building semantic kernels for text classification using Wikipedia}},
	  author={Wang, P. and Domeniconi, C.},
	  year={2008},
	  publisher={ACM New York, NY, USA}
	}
	
	@article{layout,
	  title={{A survey of automated layout techniques for information presentations}},
	  author={Lok, S. and Feiner, S.},
	  journal={Proceedings of SmartGraphics},
	  volume={2},
	  year={2001}
	}
	
	@article{buhr,
	  title={{Newspaper layout aesthetics judged by artificial neural networks}},
	  author={Buhr, M. and Bld, N.M.},
	  journal={ALCOM-IT Technical Report TR-050-96, Aarhus University},
	  year={1996}
	}
	
	@conference{newsdoc,
	  title={{Newspaper document analysis featuring connected line segmentation}},
	  author={Mitchell, PE and Yan, H.},
	  booktitle={Document Analysis and Recognition, 2001. Proceedings. Sixth International Conference on},
	  pages={1181--1185},
	  year={2001}
	}
	
	@article{dogeo,
	  title={{Adding geographic scopes to web resources}},
	  author={Silva, M.J. and Martins, B. and Chaves, M. and Afonso, A.P. and Cardoso, N.},
	  journal={Computers, Environment and Urban Systems},
	  volume={30},
	  number={4},
	  pages={378--399},
	  year={2006},
	  publisher={Elsevier}
	}
	
	
	
